package com.bzchao.poiall;

import org.apache.commons.io.IOUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Entities;
import org.junit.jupiter.api.Test;

import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;

public class HtmlToXHtmlJsoup {

    @Test
    public void testJsoup() throws Exception {
        InputStream input = this.getClass().getResourceAsStream("/taskReport_docx4j.html");

        Document doc = Jsoup.parse(input, "utf-8", "");
        doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml).escapeMode(Entities.EscapeMode.xhtml);


        OutputStream outputStream = new FileOutputStream("taskReport_docx4j.xhtml");
        IOUtils.write(doc.html(), outputStream, "utf-8");
    }

}
